
%define NUM_PARTICLES 256

section .bss
    trailmap resb 256*128 ; it's important that particles comes right after this

    ; every particle is 6 bytes in RAM: 2x posx, 2x posy, 1x velx, 1x vely
    particles resw NUM_PARTICLES*3

    framecounter resw 1 ; should probably go in .data, saving a mov, but I'm too dumb for it


section .text
    org 0x100

    mov word [framecounter], 0

;;; clear_trailmap ; it will be fiiineee uninitialized

    ;mov di, trailmap
    ;mov cx, 256*128

    ; L_clear_fill:
    ;    stosb ; pray that ah is 0
    ;    loop L_clear_fill

    ; dummy solution
    mov di, particles ; if clear_trailmap is cut, so L_test_fill doesn't break

;;; end clear_trailmap

;;;; init stuff

    ; set video mode to 13h (320x200)
    mov al, 0x13 ; pray that ah is 0
    int 0x10

    ; set custom ISR for int 0x1c (timer interrupt)
    mov ax, 0x2500 + 0x1c
    mov dx, I_main
    int 0x21

    ; different prng seed:
    ; The number of clock ticks since midnight will be returned in cx:dx. There are 0x1800B0 clock ticks per day.
    ; mov ah, 0
    ; int 0x1a
    ; mov ax, dx

    ; mov di, particles ; di is already after trailmap, which is where particles is
    mov cx, 3*NUM_PARTICLES
    L_test_fill:
        ; LEET PRNG
        add ax, 42
        imul ax, 1337
        stosw
        loop L_test_fill


;;;; end init stuff

halt:
    hlt
    jmp halt



I_main: ; called at 18.2 Hz by timer interrupt

    mov ax, ds
    mov es, ax

    mov si, trailmap
    mov di, si ;saving trailmap address into di

    ;;;; vertical "blur"
        ; actually just swapping vertical pairs of pixels here and there.
        ; the horizontal step will smooth it out, and this does the job of smearing vertically

        ;; it would be nice if these could cycle mod 6, but oh well, bytes...

;;
;;        ;mov cx, 5461 ; floor(128*256/6) ; would be nicer, but would have to handle overflows
;;        mov cx, 5418 ; floor(127*256/6) ; number of pixel pairs to swap, with no overflows at bottom
;;        L_vert_blur:
;;
;;            lodsb ; al is top pixel
;;            xchg al, byte [si+256] ; putting al into bottom pixel and bottom pixel into al
;;            mov [si], al ; writing bottom pixel value back to top pixel location
;;
;;            add si, 6 ; 6 is nice because mod 256, it advances by 2 columns to the right with each row
;;
;;            loop L_vert_blur
;;        mov si, di ; di still has trailmap address in it, restoring it into si
    ;;;; end vertical "blur"


    ;;;; horizontal blur (also does decay)

        mov dl, [si+128*255-1] ; will store "prev" (starting with bottom right corner)
        xor dh, dh ; will stay 0
        xor bh, bh ; will stay 0

        mov cx, 128*256
        L_horiz_blur: ; this is not done row-by-row, because it's fine this way
            ; doing floor((prev + curr + curr - floor(curr / something) + next) / 4)

            mov ax, dx ; starting with prev

            mov bl, [si] ; reading curr
            add ax, bx  ; adding it twice
            add ax, bx
            mov dl, bl  ; storing curr as next

            shr bl, 3  ; do a bit more decay
            sub ax, bx ; by subtracting a part of curr

            ; mov bl, dl ; overwritten everywhere except in the last col - therefore optional

            ; reading next
            inc si

            ;cmp cx, 1 ; before the loop instruction, counter is 1 for the last column
            ;je skipreadnext
                mov bl, [si] ; shouldn't read if cx is 1! will overflow in the last row, into the particles area
                ; - but who cares. will cause a little flicker in the bottom right corner .... big deal
            ;skipreadnext:

            add ax, bx ; adding next
            shr ax, 2 ; dividing by 4

            stosb
            loop L_horiz_blur

    ;;;; end horizontal blur
    endblur:

    ;;;; handle particles


        mov cx, NUM_PARTICLES

        cmp word [framecounter], 360
        jg allparticles

            mov cx, [framecounter]
            shr cx, 6
            inc cx

        allparticles:




        mov si, particles
        L_foreach_particle:
            push cx

            ;;;; sense
            jmp endsense
                mov ax, [si] ; posx
                mov bx, [si+2] ; posy
                mov cx, [si+4] ; velx + vely (little endian means that velx is in cl and vely in ch)


                and bx, 0x7FFF ; there are only 128 rows, wraparound posy

                mov bh, al
                mov bl, 0
                sub bl, ah

                ;sar bl, 1 ; sideways half-length ax
                ;sar bh, 1

                mov cx, [si] ; sensor position, starting with particle pos
                add cl, al
                add ch, ah

                add cl, bl
                add ch, bh



                ; dl is the trail sample left, dh is the trail sample right


                mov cx, [si+2] ; loads velx into cl vely into ch

                cmp dl, dh
                jg B_turn_left
                jl B_turn_right
                jmp B_turn_end



                B_turn_left:
                    add cl, bl
                    add ch, bh
                    jmp B_turn_end

                B_turn_right:
                    sub cl, bl
                    sub ch, bh

                B_turn_end:


                mov [si+4], cx ; stores cl into velx and ch into vely


            ;;; end sense
            endsense:

            ;;; move

                ; could shift right by fewer bits for faster dots, keeping low bits as noise,
                ; but then sometimes they would skip rows, really making vertical blur necessary...

                ; x
                mov bh, [si+4] ; load velx
                sar bx, 8 ; fill bh with sign bit, shift it into bl
                add [si], bx

                ; y
                mov bh, [si+5]
                sar bx, 8 ; fill bh with sign bit, shift it into bl
                add [si+2], bx

                and word [si+2], 0x7FFF ; there are only 128 rows, wraparound posy

            ;;; end move


            ;;; deposit

                mov al, [si+1]   ; the high bits of posx will be the column, so low bits of ax
                mov ah, [si+3]   ; the high bits of posy will be the row, so high bits of ax
                mov di, trailmap
                add di, ax ; the 256 columns make indexing neat
                mov byte [ds:di], 0xFF-8 ; just set it to high, no overflows (the -8 is because of the rounding while blitting)

            ;;; end deposit

        pop cx
        add si, 6
        loop L_foreach_particle

    ;;;; end handle particles


    ;;;; copy trail map to screen ;;;;

        ; Set ES to video memory segment
        mov ax, 0xA000
        mov es, ax

        ; Calculate offset in video memory to center the bitmap
        ; (320-256)/2 = 32 pixels horizontally
        ; (200-128)/2 = 36 pixels vertically
        mov di, (36*320)+32   ; Destination offset in video memory

        ; Set DS:SI to the source bitmap data
        mov si, trailmap

        mov cx, 128  ; number of rows
        L_copy_row:
            push cx

            mov cx, 256           ; Number of bytes per row to copy
            L_copy_pixel:
                lodsb

                xor ah, ah  ; rounding - optional
                add ax, 8

                shr al, 4 ; 0..255 to 0..15

                cmp word [framecounter], 1000
                jl norainbow
                add al, 16 ; change to 32 for rainbow
                norainbow:
                add al, 16 ; change to 32 for rainbow

                stosb
                loop L_copy_pixel

            ; Add offset to DI to jump to the next line
            add di, 320-256        ; 320 (screen width) - 256 (bitmap width)

            pop cx
            loop L_copy_row

    ;;;; end copy trail map ;;;;

    inc word [framecounter]

iret ; I_main is an interrupt handler, so we must return with iret

